Code
# Set options
knitr:: opts_chunk$ set (
echo = TRUE ,
warning = FALSE ,
message = FALSE ,
fig.align = 'center' ,
fig.retina = 2
)
rm (list= ls ())
library (tinytex)
Warning: package 'tinytex' was built under R version 4.5.2
Code
library (ggplot2)
#library(table1)
library (gt)
library (survival)
library (data.table)
library (randomForest)
library (grf)
library (policytree)
library (DiagrammeR)
#library(grid)
#library(forestploter)
#library(randomizr)
# library(devtools)
# install_github("larry-leon/weightedsurv", force = TRUE)
#install.packages("weightedsurv")
# install_github("larry-leon/forestsearch", force = TRUE)
library (forestsearch)
library (weightedsurv)
# Set theme for plots
theme_set (theme_minimal (base_size = 12 ))
Summary
Reproducing main GBSG analysis
Datasetup
Code
df.analysis <- gbsg
df.analysis <- within (df.analysis,{
id <- as.numeric (c (1 : nrow (df.analysis)))
# time to months
time_months <- rfstime/ 30.4375
grade3 <- ifelse (grade== "3" ,1 ,0 )
treat <- hormon
})
confounders.name <- c ("age" ,"meno" ,"size" ,"grade3" ,"nodes" ,"pgr" ,"er" )
outcome.name <- c ("time_months" )
event.name <- c ("status" )
id.name <- c ("id" )
treat.name <- c ("hormon" )
Kaplan-Meier curves and baseline summary
Code
dfcount <- df_counting (
df = df.analysis,
by.risk = 6 ,
tte.name = outcome.name,
event.name = event.name,
treat.name = treat.name
)
plot_weighted_km (dfcount, conf.int = TRUE , show.logrank = TRUE , ymax = 1.05 , xmed.fraction = 0.775 , ymed.offset = 0.125 )
Code
create_summary_table (data = df.analysis, treat_var = treat.name,
table_title = "GBSG Characteristics by Treatment Arm" ,
vars_continuous= c ("age" ,"nodes" ,"size" ,"er" ,"pgr" ),
vars_categorical= c ("grade" ,"grade3" ),
font_size = 12 )
Characteristic
Control (n=440)
Treatment (n=246)
P-value
SMD
age
Mean (SD)
51.1 (10.0)
56.6 (9.4)
<0.001
0.57
nodes
Mean (SD)
4.9 (5.6)
5.1 (5.3)
0.665
0.03
size
Mean (SD)
29.6 (14.4)
28.8 (14.1)
0.470
0.06
er
Mean (SD)
79.7 (124.2)
125.8 (191.1)
<0.001
0.30
pgr
Mean (SD)
102.0 (170.0)
124.3 (249.7)
0.213
0.11
grade
0.273
0.06
1
48 (10.9%)
33 (13.4%)
2
281 (63.9%)
163 (66.3%)
3
111 (25.2%)
50 (20.3%)
grade3
0.174
0.05
0
329 (74.8%)
196 (79.7%)
1
111 (25.2%)
50 (20.3%)
GRF analysis
Code
## GRF
grf_est1 <- grf.subg.harm.survival (data= df.analysis,
confounders.name = confounders.name,
outcome.name= outcome.name, event.name= event.name, id.name= id.name, treat.name= treat.name,
maxdepth = 2 , n.min = 60 , dmin.grf = 12 , frac.tau= 0.6 , details= TRUE )
tau, maxdepth = 46.75811 2
leaf.node control.mean control.size control.se depth
1 2 6.49 82.00 3.34 1
2 3 -4.10 604.00 1.06 1
11 4 -7.90 112.00 2.81 2
21 5 3.86 177.00 1.87 2
4 7 -5.89 356.00 1.33 2
Selected subgroup:
leaf.node control.mean control.size control.se depth
1 2 6.49 82.00 3.34 1
GRF subgroup found
Terminating node at max.diff (sg.harm.id):
[1] "er <= 0"
All splits:
[1] "er <= 0" "age <= 50" "age <= 43"
Code
# NOTE : In general for GRF trees
# leaf1 --> recommend control
# leaf2 --> recommend treatment
# Tree depth 1
plot (grf_est1$ tree1,leaf.labels= c ("Control" ,"Treat" ))
Code
# Tree depth 2
plot (grf_est1$ tree2,leaf.labels= c ("Control" ,"Treat" ))
Forestsearch with depth=2 (maxk = 2)
Code
# Setup parallel processing
library (doFuture)
library (doRNG)
registerDoFuture ()
registerDoRNG ()
system.time ({fs <- forestsearch (df.analysis, confounders.name = confounders.name,
outcome.name = "time_months" , treat.name = "hormon" , event.name = "status" , id.name = "id" ,
potentialOutcome.name = NULL ,
df.test = NULL ,
flag_harm.name = NULL ,
hr.threshold = 1.25 , hr.consistency = 1.0 , pconsistency.threshold = 0.90 ,
sg_focus = "maxSG" , max_subgroups_search = 30 ,
use_twostage = TRUE ,
showten_subgroups = TRUE , details= TRUE ,
conf_force = NULL ,
cut_type = "default" , use_grf = TRUE , plot.grf = TRUE , use_lasso = TRUE ,
maxk = 2 , fs.splits = 1000 ,
n.min = 60 , d0.min = 10 , d1.min = 10 ,
plot.sg = TRUE , by.risk = 6 ,
parallel_args = list (plan= "callr" , workers = 30 , show_message = TRUE )
)
})
=== Two-Stage Consistency Evaluation Enabled ===
Stage 1 screening splits: 30
Maximum total splits: 1000
Batch size: 20
================================================
GRF stage for cut selection with dmin, tau = 12 0.6
tau, maxdepth = 46.75811 2
leaf.node control.mean control.size control.se depth
1 2 6.49 82.00 3.34 1
2 3 -4.10 604.00 1.06 1
11 4 -7.90 112.00 2.81 2
21 5 3.86 177.00 1.87 2
4 7 -5.89 356.00 1.33 2
Selected subgroup:
leaf.node control.mean control.size control.se depth
1 2 6.49 82.00 3.34 1
GRF subgroup found
Terminating node at max.diff (sg.harm.id):
[1] "er <= 0"
All splits:
[1] "er <= 0" "age <= 50" "age <= 43"
GRF cuts identified: 3
Cuts: er <= 0, age <= 50, age <= 43
# of continuous/categorical characteristics 5 2
Continuous characteristics: age size nodes pgr er
Categorical characteristics: meno grade3
## Prior to lasso: age size nodes pgr er
#### Lasso selection results
7 x 1 sparse Matrix of class "dgCMatrix"
s0
age .
meno .
size 0.005433435
grade3 0.178139021
nodes 0.049670523
pgr -0.001812895
er .
Cox-LASSO selected: size grade3 nodes pgr
Cox-LASSO not selected: age meno er
### End Lasso selection
## After lasso: size nodes pgr
Default cuts included from Lasso: size <= mean(size) size <= median(size) size <= qlow(size) size <= qhigh(size) nodes <= mean(nodes) nodes <= median(nodes) nodes <= qlow(nodes) nodes <= qhigh(nodes) pgr <= mean(pgr) pgr <= median(pgr) pgr <= qlow(pgr) pgr <= qhigh(pgr)
Categorical after Lasso: grade3
Factors per GRF: er <= 0 age <= 50 age <= 43
Initial GRF cuts included er <= 0 age <= 50 age <= 43
Factors included per GRF (not in lasso) er <= 0 age <= 50 age <= 43
===== CONSOLIDATED CUT EVALUATION (IMPROVED) =====
Evaluating 16 cut expressions once and caching...
Cut evaluation summary:
Total cuts: 16
Valid cuts: 16
Errors: 0
✓ All 16 factors validated as 0/1
===== END CONSOLIDATED CUT EVALUATION =====
# of candidate subgroup factors= 16
[1] "er <= 0" "age <= 50" "age <= 43" "size <= 29.3" "size <= 25"
[6] "size <= 20" "size <= 35" "nodes <= 5" "nodes <= 3" "nodes <= 1"
[11] "nodes <= 7" "pgr <= 110" "pgr <= 32.5" "pgr <= 7" "pgr <= 131.8"
[16] "grade3"
Number of possible configurations (<= maxk): maxk = 2 , # combinations = 528
Events criteria: control >= 10 , treatment >= 10
Subgroup search completed in 0.01 minutes
Found 13 subgroup candidate(s)
# of candidate subgroups (meeting all criteria) = 13
# of unique initial candidates: 13
# Restricting to top stop_Kgroups = 30
# of candidates to evaluate: 13
Algorithm: Two-stage sequential
Stage 1 splits: 30
Screen threshold: 0.763
Max total splits: 1000
Batch size: 20
Parallel processing: callr with 30 workers
*** Subgroup found: {er <= 0}
% consistency criteria met= 0.93
SG focus= maxSG
Subgroup Consistency Minutes= 0.045
Algorithm used: Two-stage sequential
Candidates evaluated: 13
Candidates passed: 7
Subgroup found (FS) with sg_focus='maxSG'
Selected subgroup: {er <= 0}
Minutes forestsearch overall = 0.06
Consistency algorithm used: twostage
user system elapsed
19.018 1.572 3.740
Code
plan ("sequential" )
# Results for estimation (training) data, which_df = "est" is default
res_tabs <- sg_tables (fs, ndecimals = 3 , which_df = "est" )
res_tabs$ sg10_out
Two-factor subgroups (maxk=2)
{er <= 0}
82
45
16
1.951
0.930
{er <= 0}
{pgr <= 32.5}
75
41
16
2.222
0.990
{er <= 0}
!{age <= 43}
68
38
14
2.164
0.960
{er <= 0}
{pgr <= 7}
64
34
13
1.992
0.920
{er <= 0}
{size <= 35}
61
34
15
2.537
1.000
{er <= 0}
{nodes <= 7}
61
31
11
2.335
1.000
{er <= 0}
!{size <= 20}
61
35
12
2.054
0.930
Search Configuration: Single-factor candidates (L) = 32; Maximum combinations evaluated = 528; Search depth (maxk) = 2
Search Results: Candidate subgroups found = 13; Maximum HR estimate = 2.54
Note: E1 = events in treatment arm; Pcons = consistency proportion
Code
Training data estimates
ITT
686 (100.0%)
246 (35.9%)
299 (43.6%)
66.3
50.2
7.8
0.69 (0.54, 0.89)
Questionable
82 (12.0%)
26 (31.7%)
45 (54.9%)
22.9
43.7
-14
1.95 (1.04, 3.67)
Recommend
604 (88.0%)
220 (36.4%)
254 (42.1%)
66.7
52.6
9.3
0.61 (0.47, 0.80)
Bootstrap Inference
Code
#output_dir <- "dev/vignettes-working/applications/gbsg/results"
output_dir <- "results/"
save_results <- dir.exists (output_dir)
# patchhwork needed for a combined bootstrap plot (otherwise if not avaialable will not produce)
library (patchwork)
# Number of bootstrap samples
NB <- 100
system.time ({fs_bc <- forestsearch_bootstrap_dofuture (
fs.est = fs,
nb_boots = NB,
show_three = FALSE ,
details = TRUE )
})
Ystar matrix generated should be 'boots x N': 100 x 686
ForestSearch parameters for bootstrap iterations:
- sg_focus: maxSG
- maxk: 2
- fs.splits: 1000
- max_subgroups_search: 30
- hr.threshold: 1.25
- hr.consistency: 1
- pconsistency.threshold: 0.9
- n.min: 60
- use_twostage: TRUE
- use_lasso: TRUE
- use_grf: TRUE
Bootstrap-specific overrides:
- grf_res: NULL (forces re-selection)
- grf_cuts: NULL (forces re-selection)
- parallel_args: sequential (prevents nested parallelism)
- details: FALSE (suppressed in workers)
- plot.sg: FALSE
- plot.grf: FALSE
=== Bootstrap Analysis Complete ===
Success rate: 87.0% (87/100)
H (Questionable) Estimates:
Unadjusted: 1.95 (1.04,3.67)
Bias-corrected: 1.56 (0.71,3.43)
Hc (Recommend) Estimates:
Unadjusted: 0.61 (0.47,0.8)
Bias-corrected: 0.64 (0.5,0.81)
===================================
user system elapsed
976.284 19.248 92.981
Code
plan ("sequential" )
if (save_results) {
filename <- file.path (output_dir,
paste0 ("gbsg_k2b_B=" ,
format (NB),
".RData" ))
save (df.analysis, fs, fs_bc, file = filename)
cat (" \n Results saved to:" , filename, " \n " )
}
Results saved to: results//gbsg_k2b_B=100.RData
Diagnostics and Summaries
Code
#load("~/Documents/GitHub/forestsearch/vignettes/results/sim_gbsg_example_B=1000.RData")
output_dir <- "results/"
#NB <- 1000
load_results <- dir.exists (output_dir)
if (load_results){
filename <- file.path (output_dir,
paste0 ("gbsg_k2b_B=" ,
format (NB),".RData" ))
load (file = filename)
}
summaries <- summarize_bootstrap_results (
sgharm = fs$ sg.harm,
boot_results = fs_bc,
create_plots = TRUE ,
est.scale = "hr"
)
===============================================================
BOOTSTRAP ANALYSIS SUMMARY
===============================================================
BOOTSTRAP SUCCESS METRICS:
-------------------------------------------------------------
Total iterations: 100
Successful subgroup ID: 87 (87.0%)
Failed to find subgroup: 13 (13.0%)
TIMING ANALYSIS:
-------------------------------------------------------------
Overall:
Total bootstrap time: 1.53 minutes (0.03 hours)
Average per iteration: 0.02 min (0.9 sec)
Projected for 1000 boots: 15.27 minutes (0.25 hours)
Per-iteration timing:
Mean: 0.16 min (9.6 sec)
Median: 0.16 min (9.6 sec)
Std Dev: 0.09 minutes
Range: [0.03, 0.39] minutes
IQR: [0.09, 0.23] minutes
ForestSearch timing (successful iterations only):
Iterations with FS: 100 (100.0%)
Mean FS time: 0.16 min (9.6 sec)
Median FS time: 0.16 min (9.6 sec)
Total FS time: 15.94 minutes
FS time % of total: 1044.2%
Overhead timing (Cox models, bias correction, etc.):
Mean overhead: 0.00 min (0.0 sec)
Median overhead: 0.00 min (0.0 sec)
Total overhead: 0.02 minutes
Overhead % of total: 1.5%
PERFORMANCE ASSESSMENT:
-------------------------------------------------------------
Performance rating: ✓✓✓ Excellent
Average iteration speed: 0.9 seconds
===============================================================
Code
sg_tab <- summaries$ table
sg_tab
Bootstrap bias-corrected estimates (100 iterations)
N
NT
Events
MedT
MedC
RMSTd
HR (95% CI)†
HR‡ (95% CI)
Qstnbl
82 (12.0%)
26 (31.7%)
45 (54.9%)
22.9
43.7
-14
1.95 (1.04, 3.67)
1.56 (0.71,3.43)
Recmnd
604 (88.0%)
220 (36.4%)
254 (42.1%)
66.7
52.6
9.3
0.61 (0.47, 0.80)
0.64 (0.5,0.81)
Note : Med = Median survival time (months). RMSTd = Restricted mean survival time difference. Subgroup identified in 87.0% of bootstrap samples.
Code
event_summary <- summarize_bootstrap_events (fs_bc, threshold = 12 )
=== Bootstrap Event Count Summary ===
Total bootstrap iterations: 100
Event threshold: <12 events
ORIGINAL Subgroup H on BOOTSTRAP samples:
Control arm <12 events: 0 (0.0%)
Treatment arm <12 events: 0 (0.0%)
Either arm <12 events: 0 (0.0%)
ORIGINAL Subgroup Hc on BOOTSTRAP samples:
Control arm <12 events: 0 (0.0%)
Treatment arm <12 events: 0 (0.0%)
Either arm <12 events: 0 (0.0%)
NEW Subgroups found: 87 (87.0%)
NEW Subgroup H* on ORIGINAL data:
Control arm <12 events: 1 (1.1% of successful)
Treatment arm <12 events: 1 (1.1% of successful)
Either arm <12 events: 2 (2.3% of successful)
NEW Subgroup Hc* on ORIGINAL data:
Control arm <12 events: 0 (0.0% of successful)
Treatment arm <12 events: 0 (0.0% of successful)
Either arm <12 events: 0 (0.0% of successful)
Code
summaries$ diagnostics_table_gt
Analysis of 100 bootstrap iterations
Success Rate
Total iterations
100
Successful subgroup ID
87 (87.0%)
Failed to find subgroup
13 (13.0%)
Success rating
Good ✓✓
Subgroup H (Questionable)
Unadjusted estimate
1.95 (1.04, 3.67)
Bias-corrected estimate
1.56 (0.71, 3.43)
Bias correction impact
20.2%
CI width change
2.64 -> 2.72
Subgroup Hc (Recommend)
Unadjusted estimate
0.61 (0.47, 0.80)
Bias-corrected estimate
0.64 (0.50, 0.81)
Bias correction impact
3.4%
CI width change
0.33 -> 0.32
Bootstrap Quality: H
Valid iterations
87
Mean (SD)
0.44 (0.40)
Coefficient of variation
90.0%
Skewness
0.01
Bootstrap Quality: Hc
Valid iterations
87
Mean (SD)
-0.45 (0.21)
Coefficient of variation
45.5%
Skewness
0.27
Search Performance
Mean max HR found
3.32 (1.37)
Mean factors evaluated
39.4
Mean combinations tried
832
Proportion at maxk
--
Interpretation Guide:
✓ Good stability : Subgroup is reliably identified in most bootstrap samples.
⚠ High variability : Bootstrap estimates are imprecise (CV >= 25%). Consider increasing nb_boots or sample size.
Code
summaries$ subgroup_summary$ original_agreement
Metric Value
<char> <char>
1: Total bootstrap iterations 100
2: Successful iterations 87
3: Failed iterations (no subgroup) 13
4: Exact match with original 5 (5.7%)
5: Different from original 82 (94.3%)
Code
summaries$ subgroup_summary$ factor_presence
Rank Factor Count Percent
6 1 pgr 62 71.264368
1 2 age 38 43.678161
7 3 size 29 33.333333
2 4 er 18 20.689655
5 5 nodes 11 12.643678
3 6 grade3 8 9.195402
4 7 meno 1 1.149425
Code
summaries$ subgroup_summary$ factor_presence_specific
Rank Base_Factor Factor_Definition Count Percent
34 1 age {age <= 47} 9 10.34483
Forest Search n-fold cross-validation
Code
output_dir <- "results/"
load_results <- dir.exists (output_dir)
#NB <- 1000
if (load_results){
filename <- file.path (output_dir,
paste0 ("gbsg_k2b_B=" ,
format (NB),".RData" ))
load (file = filename)
}
# Kfolds = n (default to n-fold cross-validations)
fs_OOB <- NULL
fs_OOB <- forestsearch_Kfold (fs.est = fs, details = TRUE , Kfolds = 30 ,
parallel_args = list (plan = "callr" , workers = 36 , show_message = TRUE ))
Cross-validation setup:
- Observations: 686
- Folds: 30
- Fold sizes (range): 22-23
ForestSearch parameters for CV folds:
- sg_focus: maxSG
- maxk: 2
- fs.splits: 1000
- max_subgroups_search: 30
- hr.threshold: 1.25
- hr.consistency: 1
- pconsistency.threshold: 0.9
- n.min: 60
- use_twostage: TRUE
- use_lasso: TRUE
- use_grf: TRUE
- (per-fold parallel: sequential)
- (per-fold details: FALSE)
- (per-fold plot.sg: FALSE)
Cross-validation complete:
- Time: 0.28 minutes
- Subgroup found in 60 % of folds
Any found: 0.6
Exact match: 0.3666667
At least 1 match: 0.3666667
Cov 1 any: 0.3666667
Cov 2 any: NaN
Cov 1 and 2 any: 0
Cov 1 exact: 0.3666667
Cov 2 exact: NaN
Agreement (sens, ppv) in H and Hc: 0.5121951 0.9635762 0.65625 0.9356913
Code
# Reset workers to single
plan (sequential)
summary_OOB <- forestsearch_KfoldOut (res= fs_OOB, details= TRUE , outall= TRUE )
Any found: 0.6
Exact match: 0.3666667
At least 1 match: 0.3666667
Cov 1 any: 0.3666667
Cov 2 any: NaN
Cov 1 and 2 any: 0
Cov 1 exact: 0.3666667
Cov 2 exact: NaN
Agreement (sens, ppv) in H and Hc: 0.5121951 0.9635762 0.65625 0.9356913
Subgroup n n1 m1 m0 RMST
Overall "ITT" "686 (100.0%)" "246 (35.9%)" "66.3" "50.2" "7.8"
FA_0 "Not recommend" "82 (12.0%)" "26 (31.7%)" "22.9" "43.7" "-14"
KfA_0 "Not recommend" "64 (9.3%)" "19 (29.7%)" "37.7" "42" "-0.34"
FA_1 "Recommend" "604 (88.0%)" "220 (36.4%)" "66.7" "52.6" "9.3"
KfA_1 "Recommend" "622 (90.7%)" "227 (36.5%)" "66.7" "52.6" "7.8"
Hazard ratio
Overall "0.69 (0.54, 0.89)"
FA_0 "1.95 (1.04, 3.67)"
KfA_0 "0.91 (0.44, 1.88)"
FA_1 "0.61 (0.47, 0.80)"
KfA_1 "0.69 (0.53, 0.89)"
Code
table (summary_OOB$ SGs_found[,1 ])
!{age <= 43} {age <= 48} {age <= 50} {er <= 0}
4 1 2 11
Code
table (summary_OOB$ SGs_found[,2 ])
!{age <= 43} {age <= 47} {age <= 48} {pgr <= 33}
2 3 1 1
Code
Ksims <- 30
fs_ten <- forestsearch_tenfold (fs.est = fs, sims = Ksims, Kfolds = 10 , details = TRUE ,
parallel_args = list (plan = "callr" , workers = 36 , show_message = TRUE ))
Starting repeated K-fold cross-validation:
- Simulations: 30
- Folds per simulation: 10
- Workers: 13
ForestSearch parameters for CV folds:
- sg_focus: maxSG
- maxk: 2
- fs.splits: 1000
- max_subgroups_search: 30
- hr.threshold: 1.25
- hr.consistency: 1
- pconsistency.threshold: 0.9
- n.min: 60
- use_twostage: TRUE
- use_lasso: TRUE
- use_grf: TRUE
- (per-fold parallel: sequential)
- (per-fold details: FALSE)
- (per-fold plot.sg: FALSE)
Repeated K-fold CV complete:
- Time: 2.15 minutes
- Successful simulations: 30 / 30
- Projected hours per 100 sims: 0.12
Code
# Reset workers to single
plan (sequential)
print (fs_ten$ find_summary)
Any Exact At least 1 Cov1 Cov2 Cov 1 & 2 Cov1 exact
0.7 0.3 0.3 0.3 NA 0.0 0.3
Cov2 exact
NA
Code
print (fs_ten$ sens_summary)
sens_H sens_Hc ppv_H ppv_Hc
0.3841463 0.9403974 0.4362162 0.9179549
Code
print (head (fs_ten$ sens_out))
sens_H sens_Hc ppv_H ppv_Hc
[1,] 0.4390244 0.9536424 0.5625000 0.9260450
[2,] 0.4024390 0.9271523 0.4285714 0.9195402
[3,] 0.3902439 0.9354305 0.4507042 0.9186992
[4,] 0.4024390 0.9519868 0.5322581 0.9214744
[5,] 0.2682927 0.9536424 0.4400000 0.9056604
[6,] 0.2804878 0.9254967 0.3382353 0.9045307
Code
print (head (fs_ten$ find_out))
Any Exact At least 1 Cov1 Cov2 Cov 1 & 2 Cov1 exact Cov2 exact
[1,] 0.7 0.4 0.4 0.4 NaN 0 0.4 NaN
[2,] 0.7 0.3 0.3 0.3 NaN 0 0.3 NaN
[3,] 0.8 0.3 0.3 0.3 NaN 0 0.3 NaN
[4,] 0.6 0.3 0.3 0.3 NaN 0 0.3 NaN
[5,] 0.5 0.2 0.2 0.2 NaN 0 0.2 NaN
[6,] 0.7 0.1 0.1 0.1 NaN 0 0.1 NaN
Code
# Save all results
output_dir <- "results/"
save_results <- dir.exists (output_dir)
if (save_results) {
filename <- file.path (output_dir,
paste0 ("gbsg_k2b_CV=" ,
format (Ksims),
".RData" ))
save (df.analysis, fs, fs_bc, fs_ten, fs_OOB, file = filename)
cat (" \n Results saved to:" , filename, " \n " )
}
Results saved to: results//gbsg_k2b_CV=30.RData
Code
#Ksims <- 200
output_dir <- "results/"
load_results <- dir.exists (output_dir)
if (load_results){
filename <- file.path (output_dir,
paste0 ("gbsg_k2b_CV=" ,
format (Ksims),".RData" ))
load (file = filename)
}
#' # Define subgroups to display
subgroups <- list (
age_gt65 = list (
subset_expr = "age > 65" ,
name = "age > 65" ,
type = "reference"
),
age_lt65 = list (
subset_expr = "age <= 65" ,
name = "age <= 65" ,
type = "reference"
),
pgr_positive = list (
subset_expr = "pgr > 0" ,
name = "pgr > 0" ,
type = "reference"
),
pgr_negative = list (
subset_expr = "pgr <= 0" ,
name = "pgr <= 0" ,
type = "reference"
)
)
# Create the forest plot
result <- plot_subgroup_results_forestplot (
fs_results = list (fs.est = fs, fs_bc = fs_bc, fs_OOB = fs_OOB, fs_kfold = fs_ten),
df_analysis = df.analysis,
subgroup_list = subgroups,
outcome.name = "time_months" ,
event.name = "status" ,
treat.name = "hormon" ,
E.name = "Hormon" ,
C.name = "CT" ,
ci_column_spaces = 25
)
# Display the plot
plot (result$ plot)